cd "C:\Users\gregory.verdugo\Google Drive\Recherche\Immigration_Panel\RevisionJEG21\Progs\tables"
/* Construction of the Shift-share instrument à la Stuhler et al. */
/********************/
/* BASELINE VERSION */
/********************/
/* Change in total number of migrants per group at the NATIONAL level D M_ot */
use "C:\Users\gregory.verdugo\Google Drive\Recherche\Immigration_Panel\RevisionJEG21\Progs\tables\pnaid.dta", clear
egen time=group(year)
egen ipnaid=group(pnaid)
xtset ipnaid time
gen dcount=D.count
keep pnaid year dcount count
save count.dta, replace

/* Actual change in the number of immigrants in the cell */
use "C:\Users\gregory.verdugo\Google Drive\Recherche\Immigration_Panel\RevisionJEG21\Progs\tables\zecount.dta", clear
keep if pnaid=="Natif"
drop if missing(ze)
gen immig=zepop-niml
keep ze zepop immig year
save immig.dta, replace

/* use  */
capture program drop shift
program define shift
use "C:\Users\gregory.verdugo\Google Drive\Recherche\Immigration_Panel\RevisionJEG21\Progs\tables\zecount.dta", clear
drop if missing(ze)
replace imshare=imshare/100
drop niml zepop
rename year yref
save share.dta, replace

/* for each year t=75,82,90,99,07,12 merge distribution from reference year (yref)
and aggregate inflows from current year */

use share.dta, replace
keep if yref=="`1'"
gen year="1975"
joinby pnaid year using count.dta
save t1.dta, replace

use share.dta, replace
keep if yref=="`2'"
gen year="1982"
joinby pnaid year using count.dta
save t2.dta, replace

use share.dta, replace
keep if yref=="`3'"
gen year="1990"
joinby pnaid year using count.dta
save t3.dta, replace

use share.dta, replace
keep if yref=="`4'"
gen year="1999"
joinby pnaid year using count.dta
save t4.dta, replace

use share.dta, replace
keep if yref=="`5'"
gen year="2007"
joinby pnaid year using count.dta
save t5.dta, replace

use share.dta, replace
keep if yref=="`6'"
gen year="2012"
joinby pnaid year using count.dta
save t6.dta, replace

use t1.dta
append using t2.dta t3.dta t4.dta t5.dta t6.dta
/* reference share x aggregate change */
gen pojt=imshare*dcount
drop if pnaid=="Natif"
/* sum over all country of origins */
collapse (sum) pit`7' = pojt , by(ze year)
save pit`7'.dta,replace
end
/* macro shift 
syntax => shift refyear75 refyear82 refyear90 refyear99 refyear07 refyear12 name */
shift 1968 1968 1968 1968 1968 1968 68
shift 1968 1975 1975 1975 1975 1975 75
shift 1968 1975 1982 1982 1982 1982 82
shift 1968 1975 1982 1990 1990 1990 90
shift 1968 1975 1982 1990 1999 1999 99

use pit68.dta
joinby ze year using pit75.dta, unm(m)
drop _merge
joinby ze year using pit82.dta, unm(m)
drop _merge
joinby ze year using pit90.dta, unm(m)
drop _merge
joinby ze year using pit99.dta, unm(m)
drop _merge
save shift_share1.dta, replace
rm pit68.dta
rm pit75.dta
rm pit82.dta
rm pit90.dta
rm pit99.dta

/****************************************************/
/* ADD Cluster Propensity Version of the Instrument */
/* FIRST YOU NEED */
/* ACTUAL INFLOW RATE per immigrants from the group */
use "C:\Users\gregory.verdugo\Google Drive\Recherche\Immigration_Panel\RevisionJEG21\Progs\tables\zecount.dta", clear
drop if missing(pnaid)
drop if missing(ze)
egen time=group(year)
egen ize=group(ze pnaid)
xtset ize time
joinby pnaid year using "C:\Users\gregory.verdugo\Google Drive\Recherche\Immigration_Panel\RevisionJEG21\Progs\tables\pnaid.dta", unm(m)
drop _merge
drop percent
/* actual inflow rate by PNAID */
gen air=d.niml/L.count
keep pnaid ze year air count
save air.dta, replace

capture program drop shift2
program define shift2
/* PREDICTED INFLOW RATE per immigrants of the group */
use "C:\Users\gregory.verdugo\Google Drive\Recherche\Immigration_Panel\RevisionJEG21\Progs\tables\zecount.dta", clear
drop if missing(ze)
replace imshare=imshare/100
drop niml zepop
rename year yref
save share.dta, replace

/* for each year t=75,82,90,99,07,12 merge distribution from reference year (yref)
and aggregate inflows from current year */

use share.dta, replace
keep if yref=="`1'"
gen year="1975"
joinby pnaid year using count.dta
save t1.dta, replace

use share.dta, replace
keep if yref=="`2'"
gen year="1982"
joinby pnaid year using count.dta
save t2.dta, replace

use share.dta, replace
keep if yref=="`3'"
gen year="1990"
joinby pnaid year using count.dta
save t3.dta, replace

use share.dta, replace
keep if yref=="`4'"
gen year="1999"
joinby pnaid year using count.dta
save t4.dta, replace

use share.dta, replace
keep if yref=="`5'"
gen year="2007"
joinby pnaid year using count.dta
save t5.dta, replace

use share.dta, replace
keep if yref=="`6'"
gen year="2012"
joinby pnaid year using count.dta
save t6.dta, replace

use t1.dta, clear
append using t2.dta t3.dta t4.dta t5.dta t6.dta
/* predicted inflow */
gen pojt=imshare*dcount
keep pnaid ze year pojt
save temp.dta, replace
/* merge with actual inflow rate and population count */
use air.dta, clear
joinby pnaid ze year using temp.dta, unm(m) 
rm temp.dta
egen time=group(year)
egen ize=group(ze pnaid)
xtset ize time
/* calculate predicted inflow rate */
gen pir=pojt/L.count
drop if missing(pir) | missing(air)
drop _merge
sort pnaid year
/* the clustering of an origin group is measured by the slope coefficient 
from a regression of its actual inflow against its predicted inflow rates */
/* covariance */
egen cov=corr(air pir) , cov by(pnaid year)
/* variance */
egen var=var(pir) , by(pnaid year)
/* predicted outflow rate */
gen pot=cov/var
contract pnaid year pot

egen time=group(year)
egen ipnaid=group(pnaid)
xtset ipnaid time
drop _freq
/* Version 1: average of the estimated propensities to cluster in t and t-1 */
gen pot1=(pot+L.pot)/2
replace pot1=pot if missing(pot1)
replace pot1=0 if pot1<0
/* Version 2: estimated propensity to cluster in future period */
gen pot2=F.pot
replace pot2=pot if missing(pot2)
replace pot2=0 if pot2<0
save pot.dta, replace

use t1.dta, clear
append using t2.dta t3.dta t4.dta t5.dta t6.dta
joinby pnaid year using pot.dta, unm(m)
drop _merge
/* standard version */
gen pojt=imshare*dcount
/* version with predicted propensity to cluster 1 : average contemporary + lag */
gen pojt1=pot1*imshare*dcount
/* version with predicted propensity to cluster 1 : use future propensity to cluster */
gen pojt2=pot2*imshare*dcount
drop if pnaid=="Natif"
collapse (sum) pit`7' = pojt pit1`7' = pojt1 pit2`7' = pojt2 , by(ze year)
save ppit`7'.dta,replace
end

shift2 1968 1968 1968 1968 1968 1968 68
shift2 1968 1975 1975 1975 1975 1975 75
shift2 1968 1975 1982 1982 1982 1982 82
shift2 1968 1975 1982 1990 1990 1990 90
shift2 1968 1975 1982 1990 1999 1999 99

use ppit68.dta
joinby ze year using ppit75.dta, unm(m)
drop _merge
joinby ze year using ppit82.dta, unm(m)
drop _merge
joinby ze year using ppit90.dta, unm(m)
drop _merge
joinby ze year using ppit99.dta, unm(m)
drop _merge
drop pit68 pit75 pit82 pit90 pit99
save shift_share2.dta, replace
rm ppit68.dta
rm ppit75.dta
rm ppit82.dta
rm ppit90.dta
rm ppit99.dta

/* Construct previous instrument using same method than in the first version of the paper */
capture program drop shiftF
program define shiftF
use "C:\Users\gregory.verdugo\Google Drive\Recherche\Immigration_Panel\RevisionJEG21\Progs\tables\zecount.dta", clear
drop if missing(ze)
replace imshare=imshare/100
drop niml zepop
rename year yref
save share.dta, replace

/* for each year t=75,82,90,99,07,12 merge distribution from reference year (yref)
and aggregate inflows from current year */

/* for 1968 use observed number */
use immig.dta, clear
keep ze year immig
keep if year=="1968"
rename immig AOpit`7'
save t0.dta, replace

use share.dta, replace
keep if yref=="`1'"
gen year="1975"
joinby pnaid year using count.dta
save t1.dta, replace

use share.dta, replace
keep if yref=="`2'"
gen year="1982"
joinby pnaid year using count.dta
save t2.dta, replace

use share.dta, replace
keep if yref=="`3'"
gen year="1990"
joinby pnaid year using count.dta
save t3.dta, replace

use share.dta, replace
keep if yref=="`4'"
gen year="1999"
joinby pnaid year using count.dta
save t4.dta, replace

use share.dta, replace
keep if yref=="`5'"
gen year="2007"
joinby pnaid year using count.dta
save t5.dta, replace

use share.dta, replace
keep if yref=="`6'"
gen year="2012"
joinby pnaid year using count.dta
save t6.dta, replace

use t1.dta, clear
append using t2.dta t3.dta t4.dta t5.dta t6.dta
/* reference share x aggregate change */
gen Iojt=imshare*count
drop if pnaid=="Natif"
/* sum over all country of origins */
collapse (sum) AOpit`7' = Iojt , by(ze year)
append using t0.dta
egen time=group(year)
egen ize=group(ze)
xtset ize time
gen opit`7'=D.AOpit`7'
drop AOpit`7'
drop time ize
save opit`7'.dta,replace
end
/* macro shift 
syntax => shift refyear75 refyear82 refyear90 refyear99 refyear07 refyear12 name */
shiftF 1968 1968 1968 1968 1968 1968 68
shiftF 1968 1975 1975 1975 1975 1975 75
shiftF 1968 1975 1982 1982 1982 1982 82
shiftF 1968 1975 1982 1990 1990 1990 90
shiftF 1968 1975 1982 1990 1999 1999 99

use opit68.dta
joinby ze year using opit75.dta, unm(m)
drop _merge
joinby ze year using opit82.dta, unm(m)
drop _merge
joinby ze year using opit90.dta, unm(m)
drop _merge
joinby ze year using opit99.dta, unm(m)
drop _merge
save shift_share3.dta, replace
rm opit68.dta
rm opit75.dta
rm opit82.dta
rm opit90.dta
rm opit99.dta

/* FINAL TABLE */
use immig.dta, replace
joinby ze year using shift_share1.dta, unm(m)
drop _merge
joinby ze year using shift_share2.dta, unm(m)
drop _merge
joinby ze year using shift_share3.dta, unm(m)
drop _merge

save shift_share2017.dta, replace
